1. R setup

# Import libraries
library(tidyverse) # collection of R packages designed for data science
# Vector
library(sf) ## GIS vector library new
library(sp) ## GIS vector library old
# Raster
library(raster) ## GIS raster library 
# Visualization
library(RColorBrewer) ## ready-to-use color palettes for creating beautiful graphics
library(rmapshaper) ## used to simplify geometries
library(tmap) ## easy to use approach to to create theamtic maps
library(mapview) ## interactive visualisations of spatial data
library(classInt) ## methods for choosing univariate class intervals for mapping or other graphics purposes

# Number formatting
options(scipen = 1000000)
options(digits = 6)

2. Import data

2.1 Public transportation stops

The public transportation stops dataset was downloded here:
https://data.geo.admin.ch/ch.bav.haltestellen-oev/data.zip
Release date: 06-08-2018
Format: csv

# Import csv as df
pts_df <- read.csv("./Data_in/Betriebspunkt.csv", stringsAsFactors=FALSE, header= TRUE) 

# Select desired attributes
pts_df <- pts_df %>%
  dplyr::select(pts_ID = "Nummer", pts_MunNr = "GdeNummer", "y_Koord_Ost", "x_Koord_Nord") 
# !! Thre is a small mistake in this dataset: The X and Y values are swapped. Correctly, the attributes should be named as follows: x_Koord_Ost, y_Koord_Nord. 

# Convert df to spatial df
pts_sf <- sf::st_as_sf(x = pts_df, coords = c("y_Koord_Ost", "x_Koord_Nord"), crs= 2056) # epsg:2056 is the ID of LV95

# Check data
class(pts_df)
## [1] "data.frame"
pts_df[1:5,]
##    pts_ID pts_MunNr y_Koord_Ost x_Koord_Nord
## 1 8508186       338     2628463      1220751
## 2 8587698      6173     2643683      1132003
## 3 8531188      6252     2613318      1109828
## 4 8530051      5409     2572770      1129980
## 5 8501442      6057     2650331      1141959

class(pts_sf)
## [1] "sf"         "data.frame"
pts_sf[1:5,]
## Simple feature collection with 5 features and 2 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: 2572770 ymin: 1109830 xmax: 2650330 ymax: 1220750
## epsg (SRID):    2056
## proj4string:    +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
##    pts_ID pts_MunNr                geometry
## 1 8508186       338 POINT (2628463 1220751)
## 2 8587698      6173 POINT (2643683 1132003)
## 3 8531188      6252 POINT (2613318 1109828)
## 4 8530051      5409 POINT (2572770 1129980)
## 5 8501442      6057 POINT (2650331 1141959)

2.2 Municipality

The municipality dataset was downloded here:
https://shop.swisstopo.admin.ch/de/products/landscape/boundaries3D
Release date: 2019
Format: Shapefile

# Import shp as spatial df
mun_sf <- sf::st_read("./Data_in/swissBOUNDARIES3D_1_3_TLM_HOHEITSGEBIET.shp", stringsAsFactors = FALSE, crs=2056)
## Reading layer `swissBOUNDARIES3D_1_3_TLM_HOHEITSGEBIET' from data source `C:\gitrepos\R_AS_GIS\Data_in\swissBOUNDARIES3D_1_3_TLM_HOHEITSGEBIET.shp' using driver `ESRI Shapefile'
## Simple feature collection with 2361 features and 23 fields
## geometry type:  POLYGON
## dimension:      XYZ
## bbox:           xmin: 2485410 ymin: 1075270 xmax: 2833860 ymax: 1295930
## epsg (SRID):    2056
## proj4string:    +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs

# Select desired attributes
mun_sf <- mun_sf %>%
  dplyr::select(mun_MunNr = "BFS_NUMMER", mun_CanNr = "KANTONSNUM") %>%
  dplyr::group_by(mun_MunNr)%>%
  dplyr::summarize(
    mun_CanNr = unique(mun_CanNr)
    ) %>%
  st_zm(drop =  TRUE)

# Check data
class(mun_sf)
## [1] "sf"         "tbl_df"     "tbl"        "data.frame"
mun_sf[1:5,]
## Simple feature collection with 5 features and 2 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 2673830 ymin: 1230190 xmax: 2686460 ymax: 1243160
## epsg (SRID):    2056
## proj4string:    +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## # A tibble: 5 x 3
##   mun_MunNr mun_CanNr                                              geometry
##       <int>     <int>                                         <POLYGON [m]>
## 1         1         1 ((2680492 1235035, 2680482 1235041, 2680473 1235046,~
## 2         2         1 ((2674714 1236942, 2674699 1237008, 2674689 1237061,~
## 3         3         1 ((2679008 1241960, 2679015 1241965, 2679022 1241971,~
## 4         4         1 ((2686028 1230192, 2686026 1230195, 2686024 1230205,~
## 5         5         1 ((2678523 1238540, 2678511 1238544, 2678431 1238561,~

# Plot imported data: R base plot
plot(st_geometry(pts_sf), pch = 19, col="blue", cex = 0.5)
plot(st_geometry(mun_sf), add = TRUE)
legend(x=2750000,y=1310000,
       c("Public Transportation Stops","Muncipality"),
       lty=c(NA,1),
       pch=c(19,NA),
       cex=.8, 
       col=c("blue","black"),
       bty='n'
       )

3. Calculate density of public transportation stops per municipality

# Spatial Join: instead of joining dataframes via an equal ID we join data- frames based on an equal location. 
spjoin_sf <- sf::st_join(pts_sf, mun_sf)

spjoin_sf
## Simple feature collection with 26895 features and 4 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: 2486200 ymin: 1075520 xmax: 2832000 ymax: 1294170
## epsg (SRID):    2056
## proj4string:    +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## First 10 features:
##     pts_ID pts_MunNr mun_MunNr mun_CanNr                geometry
## 1  8508186       338       338         2 POINT (2628463 1220751)
## 2  8587698      6173      6173        23 POINT (2643683 1132003)
## 3  8531188      6252      6252        23 POINT (2613318 1109828)
## 4  8530051      5409      5409        22 POINT (2572770 1129980)
## 5  8501442      6057      6057        23 POINT (2650331 1141959)
## 6  8583656      6702      6702        26 POINT (2584548 1246665)
## 7  8590028       351       351         2 POINT (2599934 1200621)
## 8  8579143      5871      5871        22 POINT (2509845 1163460)
## 9  8572195      2788      2788        13 POINT (2599991 1249781)
## 10 8576536       576       576         2 POINT (2643212 1163308)

# Check: Did the Federal Office of Transport use the same municiaplity boundaries as we did in order to add the municipality inforamtion to the dataset (pts_sf$pts_MunNr)?
spjoin_sf_check <-  spjoin_sf %>%
  dplyr::select(mun_MunNr,pts_MunNr) %>%
  dplyr::mutate (check = (spjoin_sf$mun_MunNr == spjoin_sf$pts_MunNr)) %>%
  dplyr::arrange(check)

table(spjoin_sf_check$check) #462/26408*100 = 1.75%
## 
## FALSE  TRUE 
##   475 26395
# ... no, they did not. Probably they used the same dataset with a different reference date.   

# Density calculation

# > 1. Count points per polygon
pts_count <- spjoin_sf %>%
   dplyr::group_by(mun_MunNr) %>%
   dplyr::summarise(count=n())

mun_sf <- mun_sf %>%
  dplyr::left_join(pts_count %>% st_set_geometry(NULL) , by = c("mun_MunNr" ))

# > 2. Calculate area of polygon
mun_sf <- mun_sf %>%
  dplyr::mutate(mun_area_m2 =as.vector(sf::st_area(.)))

# > 3. Calculate density: count/area
mun_sf$density <- mun_sf$count / mun_sf$mun_area_m2 * 1000000

# Plot result: tmap
# > tmap static
tmap::tmap_mode("plot")
#tmap_mode("view")
tmap::tm_shape(mun_sf) +
  tmap::tm_fill("density",
          title="Density of public transportation stops",
          style="quantile", 
          palette="BuGn", 
          colorNA = "blue")  +
  tmap::tm_layout(frame = FALSE)

4. Calculate density of public transportation stops per canton

# group_by mun_CanNr
canton_sf <- mun_sf %>%
  dplyr::select(mun_CanNr,  count, mun_area_m2) %>%
  dplyr::group_by(mun_CanNr)%>%
  dplyr::summarize(
    count = sum(count, na.rm = TRUE),
    mun_area_m2 = sum(mun_area_m2,  na.rm = TRUE)
    )  %>%
  dplyr::mutate(
    density = round((count/mun_area_m2 * 1000000),1)
    )

# Plot result: ggplot
# Get quantile breaks. Add .00001 offset to catch the lowest value
breaks_qt <- classInt::classIntervals(c(min(canton_sf$density) - .00001, canton_sf$density), n = 4, style = "quantile")
breaks_qt$brks
## [1] 0.19999 0.50000 0.75000 1.00000 7.80000

# Use cut to divice density into intervals and code them according to which interval they are in.
canton_sf <- canton_sf %>%
   dplyr::mutate(mycat = cut(density, breaks_qt$brks)) %>%
   dplyr::arrange(density)
canton_sf$mycat
##  [1] (0.2,0.5]  (0.2,0.5]  (0.2,0.5]  (0.2,0.5]  (0.2,0.5]  (0.2,0.5] 
##  [7] (0.2,0.5]  (0.2,0.5]  (0.5,0.75] (0.5,0.75] (0.5,0.75] (0.5,0.75]
## [13] (0.5,0.75] (0.75,1]   (0.75,1]   (0.75,1]   (0.75,1]   (0.75,1]  
## [19] (0.75,1]   (0.75,1]   (0.75,1]   (1,7.8]    (1,7.8]    (1,7.8]   
## [25] (1,7.8]    (1,7.8]    (1,7.8]   
## Levels: (0.2,0.5] (0.5,0.75] (0.75,1] (1,7.8]

ggplot2::ggplot(canton_sf) + 
    geom_sf(aes(fill=mycat)) +
    scale_fill_brewer(palette = "BuGn", name = "Density of public\ntransportation stops")  + 
    coord_sf(datum=NA) + # no coordinate grid
    theme_bw() +# background = white
    theme(
      panel.border = element_blank() # no border line around plot
      )

5. Filter by canton: Freiburg

# Filter mun_CanNr == 10 
pts_freiburg<- spjoin_sf %>%
    dplyr::filter(mun_CanNr == 10)

canton_freiburg <- canton_sf %>%
  dplyr::filter(mun_CanNr == 10)

# Plot result: R base plot
plot(st_geometry(pts_freiburg), pch = 19, col="blue", cex = 0.1)
plot(st_geometry(canton_freiburg), lwd = 2, add = TRUE)
legend(x=2587000 ,y=1206350,
       c("Public Transportation Stops","Muncipality"),
       lty=c(NA,1),
       pch=c(19,NA),
       cex=.8, 
       col=c("blue","black"),
       bty='n'
       )

# This plot contains a lot of data as it contains a lot of coordinates. Do we really need this level of detail for our visualization? Often we don't. 

# Reduce level of detail
canton_freiburg_gen <- rmapshaper::ms_simplify(canton_freiburg, keep = 0.01, keep_shapes = TRUE)
plot(st_geometry(pts_freiburg), pch = 19, col="blue", cex = 0.1)
plot(st_geometry(canton_freiburg_gen), lwd = 2, add = TRUE)
legend(x=2587000 ,y=1206350,
       c("Public Transportation Stops","Muncipality"),
       lty=c(NA,1),
       pch=c(19,NA),
       cex=.8, 
       col=c("blue","black"),
       bty='n'
       )

6. Calculate number of public transportation stops per raster cells

# We will use the sp package to carry out this analysis because point pattern analysis is still more established for this package. 

# sf to sp
pts_sp <- as(pts_sf, Class = "Spatial")

# Create grid with raster cell size 10'000 x 10'000m
boundingbox_pts_sp = as(extent(pts_sp), "SpatialPolygons")
r = raster::raster(boundingbox_pts_sp, resolution = 10000)
crs(r) <- CRS('+init=EPSG:2056')

# Count points per raster cell 
rc = raster::rasterize(pts_sp@coords, r, fun = "count")

rc
## class       : RasterLayer 
## dimensions  : 22, 35, 770  (nrow, ncol, ncell)
## resolution  : 10000, 10000  (x, y)
## extent      : 2486195, 2836195, 1074167, 1294167  (xmin, xmax, ymin, ymax)
## coord. ref. : +init=EPSG:2056 +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs 
## data source : in memory
## names       : layer 
## values      : 1, 582  (min, max)

# Plot result: raster plot
raster::plot(rc, 
     bty="n", 
     box=FALSE, 
     xaxt = "n", 
     yaxt = "n", 
     legend.args = list(text = 'Count', cex = 0.8, line = 1, font = 2),
      main="Number of public transportation stops per raster cell")

# As many as 8 percent of men and 0.5 percent of women with Northern European ancestry have the common form of red-green color blindness. Thus,  red and green colours should not be used together (http://east.nei.nih.gov:443/learn-about-eye-health/eye-conditions-and-diseases/color-blindness)

my.palette = RColorBrewer::brewer.pal(n = 9, name = "YlGnBu")
raster::plot(rc, 
             col = my.palette, 
             bty="n", 
             box=FALSE,  
             xaxt = "n", yaxt = "n", 
             legend.args = list(text = 'Count', cex = 0.8, line = 1, font = 2), 
             main="Number of public transportation stops per raster cell")

7. Count number of public transportation stops in a defined area

# Somdbody asked us to calculate the number of public transportation stops 500m around this coordinate: 
N <- 47.37861
E <- 8.53768

# Create df
mypoint_df <-  data.frame(N,E)

# Convert df to spatial df
mypoint_sf <- sf::st_as_sf(x = mypoint_df, coords = c("E", "N"), crs= 4326) %>%  #epsg:4326 is the ID of WGS84
  sf::st_transform(2056)

# Calculate area 500 m around mypoint_sf
myarea_sf <- mypoint_sf %>%
  sf::st_buffer(500) %>%
  dplyr::mutate(name = "MyArea")

# Count points per myarea_sf
spjoin_sf <- st_join(pts_sf, myarea_sf)

pts_count  <- spjoin_sf %>% 
  dplyr::group_by(name) %>%
  dplyr::summarise(count=n())

pts_count
## Simple feature collection with 2 features and 2 fields
## geometry type:  MULTIPOINT
## dimension:      XY
## bbox:           xmin: 2486200 ymin: 1075520 xmax: 2832000 ymax: 1294170
## epsg (SRID):    2056
## proj4string:    +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +towgs84=674.374,15.056,405.346,0,0,0,0 +units=m +no_defs
## # A tibble: 2 x 3
##   name   count                                                     geometry
## * <chr>  <int>                                             <MULTIPOINT [m]>
## 1 MyArea    20 (2682739 1247828, 2682785 1248499, 2682843 1247879, 2682890~
## 2 <NA>   26875 (2486195 1111396, 2486530 1112028, 2486730 1111962, 2486957~

# Plot result: R base plot
plot(st_geometry(myarea_sf, col="grey",border="black"))
plot(st_geometry(mypoint_sf), pch = 19, col="blue", cex =1, add = TRUE)
plot(st_geometry(pts_sf), pch = 19, col="red", cex =0.5, add = TRUE)

8. Export Data

# shp
sf::st_write(canton_freiburg_gen, "./Data_out/canton_freiburg_gen.shp", delete_layer = TRUE)
## Deleting layer `canton_freiburg_gen' using driver `ESRI Shapefile'
## Writing layer `canton_freiburg_gen' to data source `./Data_out/canton_freiburg_gen.shp' using driver `ESRI Shapefile'
## features:       1
## fields:         5
## geometry type:  Multi Polygon

# json
path_4326 = "./Data_out/mun_join_pt_sf_freiburg_gen.json"
file.remove(path_4326)
## [1] TRUE
sf::st_write(canton_freiburg_gen %>% st_transform(4326), path_4326, driver="GeoJSON")
## Writing layer `mun_join_pt_sf_freiburg_gen' to data source `./Data_out/mun_join_pt_sf_freiburg_gen.json' using driver `GeoJSON'
## features:       1
## fields:         5
## geometry type:  Multi Polygon

# tif
raster::writeRaster(rc, filename= "./Data_out/rc.tif", format="GTiff", overwrite=TRUE)

9. Which canton has the longest border?

# Calculate line length
canton_line_sf <- canton_sf %>%
   sf::st_cast("MULTILINESTRING") %>%
   dplyr::mutate(length_km = round(as.vector(as.vector(sf::st_length(.)))/1000,digits = 0))

canton_sf <-canton_sf %>%
 dplyr::mutate(
 length_km =   canton_line_sf$length_km
  ) %>%
 dplyr::arrange(desc(length_km))

# Print result
mapview::mapview(canton_sf,
           map.types = c("OpenStreetMap")
          )